Sneaker Finder v2.0 - Fast Sneaks
Learning the fastai API by refactoring an old tf/keras project
OVERVIEW
This is a project initiated while an Insight Data Science fellow. It grew out of my interest in making data driven tools in the fashion/retail space I had most recently been working. The original over-scoped idea was to make a shoe desighn tool which could quickly develop some initial sneakers based on choosing some examples, and some text descriptors. Designs are constrained by the "latent space" defined (discovered?) by a database of shoe images. However, given the 3 week sprint allowed for development, I pared the tool down to a simple "aesthetic" recommender for sneakers, using the same idea of utilizing an embedding space defined by the database fo shoe images.
Part 0: DATA
The data has been munged... link to details here [01_data.ipynb]
Part 3: ResNet feature extractor
embed database into feature space.
evaluate by simple logistic regression on classification.
filename = "zappos-50k-simplified_sort"
df = pd.read_pickle(f"data/{filename}.pkl")
import torchvision
Because we simply want to collect the features output from the model rather than do classification (or some other decision) I replaced the clasiffier head with a simple identity mapper. The simple Identity nn.Module class makes this simple.
Finally, since we are calculating the features, or embedding over 30k images with the net lets load the computations onto our GPU. We need to remember to do this in evaluation mode so batch Norm and dropout layers are disabled. [I forgot to do this initally and lost hours trying to figure out why i wasn't getting consistent results]. Setting param.requires_grad = False saves us memory since we aren't going to fit any weights for now, and protects us in case we forget to do a with torch.no_grad() before inference.
ASIDE: I'm running the compute on what I call my data-pizza oven: a linux machine loaded wiht a powerful CPU a cheap (but powerful GPU), and a bunch of memory in the gutted shell of an old PowerMac G5 case. (I picked up at a garge sale for $25 bucks! I call it the BrickOven Toaster. Check it out [here]
Later when we use the full FastAI API this should all be handled elegantly behind the scenes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device
def get_ResNet_feature_net(to_cuda=False):
# following the pattern for MnetV2 but could use the fastai resnet instead (just need to remove fc)
resnet = torchvision.models.resnet50(pretrained=True)
num_ftrs = resnet.fc.in_features
print(num_ftrs)
resnet.fc = Identity()
if to_cuda:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
resnet = resnet.to(device)
resnet.eval()
# just incase we forget the no_grad()
for param in resnet.parameters():
param.requires_grad = False
return resnet
rnet = get_ResNet_feature_net(to_cuda=True)
rnet
batch_size = 128
def get_x(r): return path_images/r['path']
#def get_y(r): return r['Category'] # we aren't actually using the category here (see 02_model.ipynb)
def get_fname(r): return r['path']
def get_dls(data,batch_size, size, device):
# put everythign in train, and don't do any augmentation since we are just going
# resize to resize and normalize to imagenet_stats
dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
splitter=IndexSplitter([]),
get_x=get_x,
get_y=get_fname,
item_tfms=Resize(size, method='pad', pad_mode='border'),
batch_tfms=Normalize.from_stats(*imagenet_stats)) # border pads white...
dls = dblock.dataloaders(data,bs=batch_size,drop_last=False,device=device)
#since we are just calculating the features for all the data turn off shuffling
dls.train.shuffle=False
return dls
def get_all_feats(dls,conv_net):
vects = []
clss = []
paths = []
batchn = 0
for imgs,classes in dls.train:
with torch.no_grad():
outs = conv_net(imgs)
vects.extend(list(outs.data.cpu().numpy()))
cs = classes.data.cpu().numpy()
clss.extend(list(cs))
ps = [dls[0].vocab[c] for c in cs]
# keep the paths for sanity check
paths.extend(ps)
batchn += 1
#store all relevant info in a pandas datafram
df_feats = pd.DataFrame({"path": paths, "classes":clss, "features":vects})
return df_feats
for i,sz in enumerate(IMG_SIZES):
print(IMG_SIZES[sz])
dls = get_dls(df,batch_size,IMG_SIZES[sz],device)
df_f = get_all_feats(dls,rnet)
# save it
filename = f"resnet50-features_{sz}"
df_f.to_pickle(f"data/{filename}.pkl")
filename = f"resnet50-features_small"
df_sm = pd.read_pickle(f"data/{filename}.pkl")
filename = f"resnet50-features_medium"
df_md = pd.read_pickle(f"data/{filename}.pkl")
filename = f"resnet50-features_large"
df_lg = pd.read_pickle(f"data/{filename}.pkl")
df_test = pd.merge(df_sm,df_md,how='left',on='path',suffixes=('_sm','_md'))
df_test = pd.merge(df_test,df_lg,how='left',on='path')
df_test = df_test.rename(columns={"classes": "classes_lg", "features": "features_lg"})
# explicitly:
df2 = pd.merge(df, df_test, how='left', on='path')
filename = "zappos-50k-resnet50-features_"
df2.to_pickle(f"data/{filename}.pkl")
df2 = df2.sort_values('path', ascending=True)
df2 = df2.reset_index(drop=True)
df2.head(3)
filename = "zappos-50k-resnet50-features_sort_3"
df2.to_pickle(f"data/{filename}.pkl")
df = df2
If we've already calculated everything just load it.
query_image = "Shoes/Sneakers and Athletic Shoes/Nike/7716996.288224.jpg"
query_ind = df[df["path"]==query_image].index
#df[df['path']==query_image]
df.loc[query_ind,['path','classes_sm']]
The DataBlock performed a number of processing steps to prepare the images for embedding into the MobileNet_v2 space (1280 vector). Lets confirm that we get the same image and MobileNet_v2 features.
base_im = PILImage.create(path_images/query_image)
#BUG: pass split_idx=1 to avoid funny business
img = Resize(IMG_SIZE, method='pad', pad_mode='border')(base_im, split_idx=1)
t2 = ToTensor()(img)
t2 = IntToFloatTensor()(t2)
t2 = torchvision.transforms.Normalize(*imagenet_stats)(t2)
t2.shape
That seemed to work well. I'll just wrap it in a simple function for now, though a FastAI Pipeline might work the best in the long run.
def load_and_prep_sneaker(image_path,size=IMG_SIZE,to_cuda=False):
"""input: expects a Path(), but string should work
output TensorImage ready to unsqueeze and "embed"
TODO: make this a Pipeline?
"""
base_im = PILImage.create(image_path)
#BUG: pass split_idx=1 to avoid funny business
img = Resize(size, method='pad', pad_mode='border')(base_im, split_idx=1)
t2 = ToTensor()(img)
t2 = IntToFloatTensor()(t2)
t2 = torchvision.transforms.Normalize(*imagenet_stats)(t2)
if to_cuda:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
return t2.to(device)
path_images/query_image
def get_convnet_feature(cnet,t_image,to_cuda=False):
"""
input:
cnet - our neutered & prepped (resnet or MobileNet_v2)
t_image - ImageTensor. probaby 3x224x224... but could be a batch
to_cuda - send to GPU? default is CPU (to_cuda=False)
output:
features - output of mnetv2vector n-1280
"""
# this is redundant b ut safe
if to_cuda:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
cnet = cnet.to(device)
t_image.to(device)
if len(t_image.shape)<4:
t_image = t_image.unsqueeze(0)
with torch.no_grad():
features = cnet(t_image)
return features
query_image2 = '/home/ergonyc/Downloads/491212_01.jpg.jpeg'
query_t = load_and_prep_sneaker(path_images/query_image)
#test_feats = get_mnet_feature(mnetv2,query_t)
test_feats = get_resnet_feature(rnet,query_t)
test_feats.shape
Now I have the "embeddings" of the database in the mobileNet_v2 output space. I can do a logistic regression on these vectors (should be identical to mapping these 1000 vectors to 4 categories (Part 3)) but I can also use an approximate KNN in this space to run the SneakerFinder tool.
next steps:
- make KNN functions.. maybe aproximate KNN e.g. Annoy for speed. Or precalculate .
- PCA / tSNE / UMAP the space with categories to visualize embedding
- make widgets
Lets find the nearest neighbors as a proxy for "similar"
I'll start with a simple "gut" test, and point out that thre realy isn't a ground truth to refer to. Remember that the goal of all this is to find some shoes that someone will like, and we are using "similar" as the aproximation of human preference.
Lets use our previously calculated sneaker-features and inspect that the k- nearest neighbors in our embedding space are feel or look "similar".
Personally, I like Jordans so I chose this as my query_image: 
from sklearn.neighbors import NearestNeighbors
import umap
def get_umap_reducer(latents):
reducer = umap.UMAP(random_state=666)
reducer.fit(latents)
return reducer
num_neighs = 5
knns = []
reducers = []
for i,sz in enumerate(IMG_SIZES):
print(ABBR[sz])
print(IMG_SIZES[sz])
features = f"features_{ABBR[sz]}"
print(features)
db_feats = np.vstack(df[features].values)
neighs = NearestNeighbors(n_neighbors=num_neighs) #add plus one in case image exists in database
neighs.fit(db_feats)
knns.append(neighs)
reducer = get_umap_reducer(db_feats)
reducers.append(reducer)
Lets take a quick look at the neighbors according to our list:
neighs = knns[0]
distance, nn_index = neighs.kneighbors(test_feats, return_distance=True)
dist = distance.tolist()[0]
df.columns
paths = df[['path','classes_sm','classes_md','classes_lg']]
neighbors = paths.iloc[nn_index.tolist()[0]].copy()
images = [ PILImage.create(path_images/f) for f in neighbors.path]
#PILImage.create(btn_upload.data[-1])
for im in images:
display(im.to_thumb(IMG_SIZE,IMG_SIZE))
# img_row = df['path'].values[nn_index[0]]
# img_row = np.insert(img_row, 0, query_image, axis=0)
type(neighs)
def query_neighs(q_feat, myneighs, data, root_path, show = True):
"""
query feature: (vector)
myneighs: fit knn object
data: series or df containing "path"
root_path: path to image files
"""
distance, nn_index = myneighs.kneighbors(q_feat, return_distance=True)
dist = distance.tolist()[0]
# fix path to the database...
neighbors = data.iloc[nn_index.tolist()[0]].copy()
images = [ PILImage.create(root_path/f) for f in neighbors.path]
#PILImage.create(btn_upload.data[-1])
if show:
for im in images: display(im.to_thumb(IMG_SIZE,IMG_SIZE))
return images
feature_func = get_resnet_feature
similar_images = []
for i,sz in enumerate(IMG_SIZES):
print(ABBR[sz])
print(IMG_SIZES[sz])
features = f"features_{ABBR[sz]}"
print(features)
query_t = load_and_prep_sneaker(path_images/query_image,IMG_SIZES[sz])
#query_f = get_convnet_feature(mnetv2,query_t)
query_f = get_convnet_feature(rnet,query_t)
similar_images.append( query_neighs(query_f, knns[i], paths, path_images, show=False) )
im = PILImage.create(path_images/query_image)
display(im.to_thumb(IMG_SIZES[sz]))
def plot_sneak_neighs(images):
''' function to plot matrix of image urls.
image_urls[:,0] should be the query image
Args:
images: list of lists
return:
null
saves image file to directory
'''
nrow = len(images)
ncol = len(images[0])
fig = plt.figure(figsize = (20, 20))
num=0
for row,image_row in enumerate(images):
for col,img in enumerate(image_row):
plt.subplot(nrow, ncol, num+1)
plt.axis('off')
plt.imshow(img);
if num%ncol == 0:
plt.title('Query')
if col>0:
plt.title('Neighbor ' + str(col))
num += 1
plt.savefig('image_search.png')
plt.show()
plot_sneak_neighs(similar_images)
similar_images2 = []
for i,sz in enumerate(IMG_SIZES):
print(ABBR[sz])
print(IMG_SIZES[sz])
features = f"features_{ABBR[sz]}"
print(features)
query_t = load_and_prep_sneaker(path_images/query_image2,IMG_SIZES[sz])
#query_f = get_convnet_feature(mnetv2,query_t)
query_f = get_convnet_feature(rnet,query_t)
similar_images2.append( query_neighs(query_f, knns[i], paths, path_images, show=False) )
im = PILImage.create(path_images/query_image2)
display(im.to_thumb(IMG_SIZES[sz]))
plot_sneak_neighs(similar_images2)
df.columns
import seaborn as sns
from sklearn.decomposition import PCA
import umap
# first simple PCA
pca = PCA(n_components=2)
for i,sz in enumerate(IMG_SIZES):
print(ABBR[sz])
print(IMG_SIZES[sz])
features = f"features_{ABBR[sz]}"
print(features)
data = df[['Category',features]].copy()
db_feats = np.vstack(data[features].values)
# PCA
pca_result = pca.fit_transform(db_feats)
data['pca-one'] = pca_result[:,0]
data['pca-two'] = pca_result[:,1]
print(f"Explained variation per principal component (sz{sz}): {pca.explained_variance_ratio_}")
smpl_fac=.5
#data=df.reindex(rndperm)
plt.figure(figsize=(16,10))
sns.scatterplot(
x="pca-one",
y="pca-two",
hue="Category",
palette=sns.color_palette("hls", 4),
data=data.sample(frac=smpl_fac),
legend="full",
alpha=0.3
)
plt.savefig(f'PCA 2-D sz{sz}')
plt.show()
# get the UMAP on deck
embedding = reducers[i].transform(db_feats)
data['umap-one'] = embedding[:,0]
data['umap-two'] = embedding[:,1]
plt.figure(figsize=(16,10))
sns.scatterplot(
x="umap-one",
y="umap-two",
hue="Category",
palette=sns.color_palette("hls", 4),
data=data.sample(frac=smpl_fac),
legend="full",
alpha=0.3
)
plt.gca().set_aspect('equal', 'datalim')
plt.title(f'UMAP projection of mobileNetV2 embedded UT-Zappos data (sz{sz})', fontsize=24)
plt.savefig('UMAP 2-D sz{sz}')
plt.show()
def get_umap_embedding(latents):
reducer = umap.UMAP(random_state=666)
reducer.fit(latents)
embedding = reducer.transform(latents)
assert(np.all(embedding == reducer.embedding_))
return embedding
fn = df.path.values
type(db_feats)
snk2vec = dict(zip(fn,db_feats))
snk2vec[list(snk2vec.keys())[0]]
embedding = get_umap_embedding(db_feats)
snk2umap = dict(zip(fn,embedding))
btn_run = widgets.Button(description='Find k-nearest neighbors')
out_pl = widgets.Output()
lbl_neighs = widgets.Label()
btn_upload = widgets.FileUpload()
def _load_image(im):
"""input: expects a Path(), but string should work, or a Bytestring
returns: resized & squared image
"""
#image = PILImage.create(btn_upload.data[-1])
image = PILImage.create(im)
#BUG: pass split_idx=1 to avoid funny business
image = Resize(IMG_SIZE, method='pad', pad_mode='border')(image, split_idx=1)
return image
def _prep_image(image,to_cuda=False):
"""input: squared/resized PIL image
output TensorImage ready to unsqueeze and "embed"
TODO: make this a Pipeline?
"""
t2 = ToTensor()(image)
t2 = IntToFloatTensor()(t2)
t2 = torchvision.transforms.Normalize(*imagenet_stats)(t2)
if to_cuda:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
return t2.to(device)
#img = _load_img(im).flip_lr()
conv_net = rnet
def on_click_find_similar(change):
""" """
im = btn_upload.data[-1]
img = _load_image(im)
tensor_im = _prep_image(img,to_cuda=False)
feats = get_convnet_feature(conv_net, tensor_im )
distance, nn_index = neighs.kneighbors(feats.numpy(), return_distance=True)
dist = distance.tolist()[0]
# fix path to the database...
neighbors = df.iloc[nn_index.tolist()[0]].copy()
#neighbors.loc[:,'db_path'] = neighbors.loc[:,'path'].astype(str).copy()
nbr = neighbors.index
out_pl.clear_output()
#with out_pl: display(plot_sneak_neighs(img_row[np.newaxis,:])) # need to convert to pil...
images = [ PILImage.create(path_images/f) for f in neighbors.path]
#PILImage.create(btn_upload.data[-1])
with out_pl:
display(img.to_thumb(200,200))
for i in images:
display(i.to_thumb(100,100))
lbl_neighs.value = f'distances: {dist}'
btn_run.on_click(on_click_find_similar)
widgets.VBox([widgets.Label('Find your sneaker!'),
btn_upload, btn_run, out_pl, lbl_neighs])
# import time
# # import matplotlib.pyplot as pltmodel
# import matplotlib.image as mpimg
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D
# import plotly
# import plotly.express as px
# import plotly.figure_factory as FF
import bokeh.plotting as bplt #import figure, show, output_notebook
#from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
import bokeh
# from bokeh.palettes import Spectral10
import umap
#from scipy import spatial #for now just brute force to find neighbors
import scipy
#from scipy.spatial import distance
from io import BytesIO
import base64
########################################3
# BOKEH
#
##########################################3
def init_bokeh_plot(umap_df):
bplt.output_notebook()
datasource = bokeh.models.ColumnDataSource(umap_df)
color_mapping = bokeh.models.CategoricalColorMapper(factors=["sns","goat"],
palette=bokeh.palettes.Spectral10)
plot_figure = bplt.figure(
title='UMAP projection VAE latent',
plot_width=1000,
plot_height=1000,
tools=('pan, wheel_zoom, reset')
)
plot_figure.add_tools(bokeh.models.HoverTool(tooltips="""
<div>
<div>
<img src='@image' style='float: left; margin: 5px 5px 5px 5px'/>
</div>
<div>
<span style='font-size: 14px'>@fname</span>
<span style='font-size: 14px'>@loss</span>
</div>
</div>
"""))
plot_figure.circle(
'x',
'y',
source=datasource,
color=dict(field='db', transform=color_mapping),
line_alpha=0.6,
fill_alpha=0.6,
size=4
)
return plot_figure
def embeddable_image(label):
return image_formatter(label)
def get_thumbnail(path):
i = Image.open(path)
i.thumbnail((64, 64), Image.LANCZOS)
return i
def image_base64(im):
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'png')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f"data:image/png;base64,{image_base64(im)}"
# do we need it loaded... it might be fast enough??
#@st.cache
def load_UMAP_data():
data_dir = f"data/{model_name}-X{params['x_dim'][0]}-Z{params['z_dim']}"
load_dir = os.path.join(data_dir,f"kl_weight{int(params['kl_weight']):03d}")
snk2umap = ut.load_pickle(os.path.join(load_dir,"snk2umap.pkl"))
return snk2umap
def load_latent_data():
data_dir = f"data/{model_name}-X{params['x_dim'][0]}-Z{params['z_dim']}"
snk2umap = load_UMAP_data()
# load df (filenames and latents...)
mids = list(snk2vec.keys())
vecs = np.array([snk2vec[m] for m in mids])
vec_tree = scipy.spatial.KDTree(vecs)
latents = np.array(list(snk2vec.values()))
losses = np.array(list(snk2loss.values()))
labels = np.array(mids)
labels2 = np.array(list(snk2umap.keys()))
embedding = np.array(list(snk2umap.values()))
assert(np.all(labels == labels2))
umap_df = pd.DataFrame(embedding, columns=('x', 'y'))
umap_df['digit'] = [str(x.decode()) for x in labels]
umap_df['image'] = umap_df.digit.map(lambda f: embeddable_image(f))
umap_df['fname'] = umap_df.digit.map(lambda x: f"{x.split('/')[-3]} {x.split('/')[-1]}")
umap_df['db'] = umap_df.digit.map(lambda x: f"{x.split('/')[-3]}")
umap_df['loss'] = [f"{x:.1f}" for x in losses]
return umap_df,snk2vec,latents, labels, vecs,vec_tree,mids
#%%
# pca_result = pca.fit_transform(df['feats'].values.tolist())
# df['pca-one'] = pca_result[:,0]
# df['pca-two'] = pca_result[:,1]
# df['pca-three'] = pca_result[:,2]
# print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
# #data=df.sample(frac=1.0)
# #data=df.reindex(rndperm)
# data = df
# #df_subset = df
# time_start = time.time()
# tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
# tsne_results = tsne.fit_transform(db_feats)
# print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
# df['tsne-2d-one'] = tsne_results[:,0]
# df['tsne-2d-two'] = tsne_results[:,1]
# plt.figure(figsize=(16,10))
# sns.scatterplot(
# x="tsne-2d-one", y="tsne-2d-two",
# hue="CategoryDir",
# palette=sns.color_palette("hls", 4),
# data=df,
# legend="full",
# alpha=0.3
#)
df.Category=='train'
# import matplotlib.image as mpimg
# import random
# from PIL import Image
# import requests
# from io import BytesIO
from sklearn.metrics import confusion_matrix
from seaborn import heatmap
from sklearn.linear_model import LogisticRegression
#Display Confusion Matrix
X_test = np.vstack(df[df.t_t_v=='test']['features'])
y_test = np.vstack(df[df.t_t_v=='test']['Category'])
X_train = np.vstack(df[df.t_t_v=='train']['features'])
y_train = np.vstack(df[df.t_t_v=='train']['Category'])
clf_log = LogisticRegression(C = 1, multi_class='ovr', max_iter=2000, solver='lbfgs')
clf_log.fit(X_train, y_train)
log_score = clf_log.score(X_test, y_test)
log_ypred = clf_log.predict(X_test)
log_confusion_matrix = confusion_matrix(y_test, log_ypred)
print(log_confusion_matrix)
disp = heatmap(log_confusion_matrix, annot=True, linewidths=0.5, cmap='Blues')
plt.savefig('log_Matrix.png')
plt.figure(figsize=(16,16))
# Plot non-normalized confusion matrix
titles_options = [("Confusion matrix, without normalization", None),
("Normalized confusion matrix", 'true')]
class_names = df.Category.unique()
from sklearn.metrics import plot_confusion_matrix
for title, normalize in titles_options:
disp = plot_confusion_matrix(clf_log, X_test, y_test,
display_labels=class_names,
cmap=plt.cm.Blues,
normalize=normalize)
disp.ax_.set_title(title)
print(title)
print(disp.confusion_matrix)
plt.savefig('log_Matrix2.png')
def get_x(r): return path_images/r['path']
def get_y(r): return r['Category']
def splitter(df):
train = df.index[df['train']].tolist()
valid = df.index[df['validate']].tolist()
return train,valid
# splitter=RandomSplitter(valid_pct=0.3,seed=42),
# get_x=get_x,
# get_y=get_y,
# #item_tfms=Resize(224)
# #item_tfms = RandomResizedCrop(224,min_scale=0.95)
# )
# dls = dblock.dataloaders(df)
doc(DataBlock)
imagenet_stats
batch_tfms=Normalize.from_stats(*imagenet_stats)
tfms = aug_transforms(mult=1.0,
do_flip=True,
flip_vert=False,
max_rotate=5.0,
min_zoom=1.0,
max_zoom=1.05,
max_lighting=0.1,
max_warp=0.05,
p_affine=0.75,
p_lighting=0.0,
xtra_tfms=None,
size=None,
mode='bilinear',
pad_mode='reflection',
align_corners=True,
batch=False,
min_scale=1.0)
# put everythign in train, and don't do any augmentation since we are just going
# resize to 160
dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
splitter=splitter,
get_x=get_x,
get_y=get_y,
item_tfms=Resize(160,method='pad', pad_mode='border'),
batch_tfms=tfms) # border pads white...
dls = dblock.dataloaders(df,bs=64,drop_last=False)
models.mobilenet_v2()._modules.items[1]
mobilenet_split = lambda m: (m[0][0][10], m[1])
learn = cnn_learner(dls, models.mobilenet_v2, splitter=mobilenet_split,cut=-1, pretrained=True,metrics=error_rate)
#learn = cnn_learner(dls, model_conv, splitter=mobilenet_split,cut=-1, pretrained=True)
lr_min,lr_steep = learn.lr_find()
lr_min, lr_steep
doc(learn.fine_tune)
learn.predict(dls.dataset[10][0])
learn.fine_tune()
learn.fit_one_cycle(6, lr_max=1e-5)
learn.recorder.plot_loss()
model_conv = torchvision.models.mobilenet_v2(pretrained=True)
for param in model_conv.parameters():
param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default
# just read this off: model_conv.classifier
num_categories = 4
num_ftrs = model_conv.classifier._modules['1'].in_features
model_conv.classifier._modules['1'] = nn.Linear(num_ftrs, num_categories)
def trns_mobilenet_v2():
model_conv = torchvision.models.mobilenet_v2(pretrained=True)
for param in model_conv.parameters():
param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default
# just read this off: model_conv.classifier
num_ftrs = model_conv.classifier._modules['1'].in_features
model_conv.classifier._modules['1'] = nn.Linear(num_ftrs, num_categories)
return model_conv
mnetV2 = torchvision.models.mobilenet_v2()
import torchvision
from torchvision import models
# def _mobilenetv2_split(m:nn.Module):
# return (m[0][0][10],m[1])(m:nn.Module): return (m[0][0][10],m[1])
mobilenet_split = lambda m: (m[0][0][10], m[1])
#arch = torchvision.models.mobilenet_v2
model_conv = models.mobilenet_v2(pretrained=True)
#learn = cnn_learner(dls, models.mobilenet_v2, cut=-1, pretrained=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_conv = model_conv.to(device)
database_json = json.load(open(f"{filename}.json"))
df = pd.DataFrame(database_json)
m = nn.Sequential(nn.AdaptiveAvgPool2d(5), nn.Linear(2,3), nn.Conv2d(2,3,1), nn.MaxPool3d(5))
assert has_pool_type(m)
test_eq([has_pool_type(m_) for m_ in m.children()], [True,False,False,True])
tst = lambda pretrained : nn.Sequential(nn.Conv2d(3,5,3), nn.BatchNorm2d(5), nn.AvgPool2d(1), nn.Linear(3,4))
m = create_body(tst)
test_eq(len(m), 2)
m = create_body(tst, cut=3)
test_eq(len(m), 3)
m = create_body(tst, cut=noop)
test_eq(len(m), 4)
for n in range(1,5):
m = create_body(tst, n_in=n)
test_eq(_get_first_layer(m)[0].in_channels, n)
tst = create_cnn_model(models.resnet18, 10, True)
tst = create_cnn_model(models.resnet18, 10, True, n_in=1)
pets = DataBlock(blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files,
splitter=RandomSplitter(),
get_y=RegexLabeller(pat = r'/([^/]+)_\d+.jpg$'))
dls = pets.dataloaders(untar_data(URLs.PETS)/"images", item_tfms=RandomResizedCrop(300, min_scale=0.5), bs=64,
batch_tfms=[*aug_transforms(size=224)])
import torchvision
from torchvision import models
# def _mobilenetv2_split(m:nn.Module):
# return (m[0][0][10],m[1])(m:nn.Module): return (m[0][0][10],m[1])
#mobilenet_split = lambda m: (m[0][0][10], m[1])
#arch = torchvision.models.mobilenet_v2
model_conv = models.mobilenet_v2(pretrained=True)
#learn = cnn_learner(dls, models.mobilenet_v2, cut=-1, pretrained=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#model_conv = model_conv.to(device)
model_conv
files = df.path
full_file_path = [(path_images/file) for file in files]
img_size = 192
df_out = process_image_files(df.index,df.CID,full_file_path, model_conv,img_size)
x = df_out['image_features'].values
x[0][0][:10]
df = pd.merge(df, df_out, how='left', on='CID')# left_on=['CID'], right_on = ['CID'])
filename = f"zappos-50k-{model_conv.__class__.__name__}-features"
#df.to_csv(f"{filename}.csv")
df.to_pickle(f"{filename}.pkl")
#
df = pd.read_pickle(f"{filename}.pkl")
df.to_json(f"{filename}.json")
pytorch ß-VAE
This is a cool reference for a fastai VAE https://medium.com/@dhuynh95/an-introduction-to-unsupervised-learning-with-fastai-a6dbd78eca2b
x,y = dls.valid.one_batch()
num_categories = len(dls.vocab)
x.shape,y, num_categories
#learn = cnn_learner(dls, models.mobilenet_v2, cut=-1, pretrained=True)
# convert each image into a rank 1000 vector...
x_hat = model_conv(x)
# lr_min,lr_steep = learn.lr_find()
# lr_min, lr_steep
x_hat.shape
outputs = arch(x)
ConvNet as fixed feature extractor
Here, we need to freeze all the network except the final layer. We need
to set requires_grad == False to freeze the parameters so that the
gradients are not computed in backward().
You can read more about this in the documentation
here <https://pytorch.org/docs/notes/autograd.html#excluding-subgraphs-from-backward>__.
Replace the classifier module output with something that predicts 'num_categories'. The mobilenet_v2 uses a classifier rather than "fc"(fully connected) final module...
model_conv = torchvision.models.mobilenet_v2(pretrained=True)
for param in model_conv.parameters():
param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default
# just read this off: model_conv.classifier
num_ftrs = model_conv.classifier._modules['1'].in_features
model_conv.classifier._modules['1'] = nn.Linear(num_ftrs, num_categories)
def trns_mobilenet_v2():
model_conv = torchvision.models.mobilenet_v2(pretrained=True)
for param in model_conv.parameters():
param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default
# just read this off: model_conv.classifier
num_ftrs = model_conv.classifier._modules['1'].in_features
model_conv.classifier._modules['1'] = nn.Linear(num_ftrs, num_categories)
return model_conv
mobilenet_split = lambda m: (m[0][0][10], m[1])
learn = cnn_learner(dls, models.mobilenet_v2, splitter=mobilenet_split,cut=-1, pretrained=True)
lr_min,lr_steep = learn.lr_find()
lr_min, lr_steep
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_conv = model_conv.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.classifier.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
Train and evaluate ^^^^^^^^^^^^^^^^^^
On CPU this will take about half the time compared to previous scenario. This is expected as gradients don't need to be computed for most of the network. However, forward does need to be computed.
model_conv = train_model(model_conv, criterion, optimizer_conv,
exp_lr_scheduler, num_epochs=25)
# Just normalization for validation
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = ''
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
model_ft = models.resnet18(pretrained=True)
arch = torchvision.models.mobilenet_v2(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
print(os.getcwd())
df.head()
import matplotlib.pyplot as plt
#import seaborn as sns
import os
from glob import glob
from datetime import datetime
import time
import numpy as np
import pandas as pd
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
#plt.ion() # interactive mode
from fastai.vision.all import *
import PIL
from PIL import Image
from os import listdir
from os.path import isfile, join
from sklearn.decomposition import PCA
def process_image_files(path, label, model):
""" Extract image features from image files stored in path
Args:
path (str): path to image location
label (int): image classification label -- we'll just use
model (obj): Keras model for feature extraction
Return:
df (dataframe): pandas dataframe containing filepath, filename labels and image fx
"""
img_height = 224
img_width = 224
#get all file names in directory, get full file path and assign lables
file_names = [f for f in listdir(path) if (isfile(join(path, f)) and f !='.DS_Store')]
full_file_path = [(path + file) for file in file_names]
labels = np.ones(len(file_names), dtype = int)*label
#instanstiate ImageFeatures object and save to array
image_obj = []
for file in tqdm_notebook(full_file_path):
image_obj.append(ImageFeatures(file, img_height, img_width, model))
#extract image features
image_features = []
for obj in image_obj:
image_features.append(obj.img_features)
#store all relevant info in a pandas datafram
df = pd.DataFrame()
df['labels'] = labels
df['image_features'] = image_features
df['image_file_path'] = full_file_path
df['file_names'] = file_names
return(df)
# eventually move this into a module
class ImageFeatures:
"""Process image files and extract features
Attributes:
model (obj): model used to extract features
image_vector (array): flattened and normalized array of image RGB codes
img_feature (array): Pooling layer of model
"""
def __init__(self, img_path, pic_width, pic_height, model):
self.model = model
img_vec = self.image_process(img_path, pic_width, pic_height)
self.feature_extraction(img_vec, model)
def image_process(self, img_path, pic_width, pic_height):
"""Get flattened RGB image vector"""
img_data = image.load_img(img_path, target_size=(pic_width, pic_height))
image_vector = image.img_to_array(img_data)
return(image_vector)
def feature_extraction(self, img_vector, model):
"""Extract image feature vector"""
img_d = img_vector.copy() #Absolutely essential, otherwise preprocess call will overwrite img_vector
img_d = np.expand_dims(img_d, axis=0)
img_d = preprocess_input(img_d) #Problem here, must be convention of keras to pass by reference?
img_d = imagenet_utils.preprocess_input(img_d)
self.img_features = model.predict(img_d)
from tqdm import tqdm_notebook
def add_features_to_database(json_filepath, image_filepath, cnn_model):
''' function to take in web-scraped data from etsy and artsy
and returns a uniform pandas dataframe with relevant features
updated database saved to json_filepath directory
Args:
image_filepath (str): path to folder containing images
Returns:
new_df (df): updated pandas dataframe containing image_features, image_quality_score
'''
#initialize image size
pwidth = 224
pheight = 224 #
#Load in data (pass names to json DBs around rather than DataFrames)
json_file = json.load(open(json_filepath))
image_df = pd.DataFrame(json_file)
#Convert images to RGB arrays and extract features
img_objects = []
for rel_path in tqdm_notebook(image_df['path']):
print(rel_path)
print(type(rel_path))
img_path = image_filepath + rel_path #[0]['path']
img_obj = ImageFeatures(img_path, pwidth, pheight, cnn_model)
img_objects.append(img_obj)
#loop over img_objects to get array of features and predict quality scores
cnn_features = []
#quality_labels = []
for entry in img_objects:
cnn_features.append(entry.img_features[0,:])
#quality_labels.append(quality_model.predict(entry.img_features)[0])
#Create new dataframe with relevant features
new_df = pd.DataFrame()
#new_df['image_url'] = image_df['image_urls']
new_df['image_features'] = cnn_features
#new_df['image_quality_score'] = quality_labels
#new_df['price'] = [get_price(price) for price in image_df['price']]
#new_df['vendor'] = [vendor] * len(quality_labels)
save_filepath = json_filepath.split('.json')[0] + '_processed.json'
new_df.to_json(save_filepath, orient = 'records')
return(new_df)
image_df = df
pwidth = 224
pheight = 224 #
json_filepath = summary_file
#Load in data (pass names to json DBs around rather than DataFrames)
json_file = json.load(open(json_filepath))
image_df = pd.DataFrame(json_file)
#DATAPATH = "/Users/ergonyc/Projects/DATABASE/"
# #from utils import train_val_generator
data_path = = os.path.join(DATAPATH,'xdata/') # path of the data
train_path = './train'
test_path = './test'
TARGET_SZ = 224
input_shape = (TARGET_SZ,TARGET_SZ,3) #(img_width,i mg_height,img_channel)')
#batch_size = 32
cnn_model = MobileNetV2(weights = 'imagenet',
include_top = False,
input_shape = input_shape,
pooling = 'avg')
model_name = 'MobileNetV2'
# with graph.as_default():
# img_features = network_model.predict(img_vector)
#Loop over paths to get updated database
imagepath = './data/'
df_features = add_features_to_database(summary_file, imagepath, cnn_model)
current_data = "summary_Jun-28-2020"
summary_file = f"{current_data}.json"
json_file = json.load(open(os.path.join(DATAPATH,summary_file)))
df = pd.DataFrame(json_file)
current_data = "train_test_val_Jun-28-2020"
tt_summary_file = f"{current_data}.json"
# save the simple file
json_file = json.load(open(os.path.join(DATAPATH,tt_summary_file)))
df = pd.DataFrame(json_file)
os.chdir('..')
img_objects = []
for rel_path in tqdm_notebook(image_df['path']):
print(rel_path)
print(type(rel_path))
img_path = image_filepath + rel_path #[0]['path']
img_obj = ImageFeatures(img_path, pwidth, pheight, cnn_model)
img_objects.append(img_obj)
import os
from glob import iglob
from os.path import join,basename
import shutil
import random
# UPDATE - JAH, put all data into DATABASE
data_path = './sneaks/data/' # path of the data
train_path = './train'
test_path = './test'
ldata_path = './data'
val_path = './validate'
df.loc[:,'train'] = 1
df.loc[:,'test'] = 0
df.loc[:,'validate'] = 0
df.path_and_file.head(5)
#for file( in iglob(join(train_path,'*')):
if not os.path.exists(ldata_path):
# create full set
print('_'*30)
print('Creating full local category set....')
print('_'*30)
for idx in df.index:
save_path = join(ldata_path,df.loc[idx,'CategoryDir'])
#print(save_path)
if not os.path.exists(save_path):
os.makedirs(save_path)
img = join(data_path,df.loc[idx,'path'])
#print(img)
shutil.copy2(img,save_path)
print('_'*30)
print('Creating full train set....')
print('_'*30)
for idx in df.index:
save_path = join(train_path,df.loc[idx,'CategoryDir'])
#print(save_path)
if not os.path.exists(save_path):
os.makedirs(save_path)
img = join(data_path,df.loc[idx,'path'])
#print(img)
shutil.copy2(img,save_path)
print('_'*30)
print('Creating test set....')
print('_'*30)
#instead of looking at the files, lets just use the database
for file in iglob(join(train_path,'*')):
#for file in df.path:
save_path = join(test_path, basename(file))
if not os.path.exists(save_path):
os.makedirs(save_path)
total_imgs = [x for x in iglob(join(file,'*'))]
rand_amt = 2* 0.12 * len(total_imgs) # select 24% of data from each category as testing + validation set
print(rand_amt)
test_imgs= []
for i in range(int(rand_amt)):
img = random.choice(total_imgs)
if img not in test_imgs:
#print(img)
df.loc[df.Filename == basename(img),'train'] = 0
df.loc[df.Filename == basename(img),'test'] = 1
shutil.move(img,save_path)
test_imgs.append(img)
print('_'*30)
print('Creating validation set....')
print('_'*30)
#instead of looking at the files, lets just use the database
for file in iglob(join(test_path,'*')):
#for file in df.path:
save_path = join(val_path, basename(file))
if not os.path.exists(save_path):
os.makedirs(save_path)
total_imgs = [x for x in iglob(join(file,'*'))]
rand_amt = 0.5 * len(total_imgs) # select 50% of data from each category to split evenly between test and validation
print(rand_amt)
test_imgs= []
for i in range(int(rand_amt)):
img = random.choice(total_imgs)
if img not in test_imgs:
#print(img)
df.loc[df.Filename == basename(img),'test'] = 0
df.loc[df.Filename == basename(img),'validation'] = 1
shutil.move(img,save_path)
test_imgs.append(img)
# # create valisation set
# print('_'*30)
# print('Creating validation set....')
# print('_'*30)
# #instead of looking at the files, lets just use the database
# for file in iglob(join(train_path,'*')):
# #for file in df.path:
# save_path = join(val_path, basename(file))
# if not os.path.exists(save_path):
# os.makedirs(save_path)
# total_imgs = [x for x in iglob(join(file,'*'))]
# rand_amt = 0.12 * len(total_imgs) # select 12% of data from each category as testing set
# print(rand_amt)
# save the simple file
summary_file ='train_test_val_Feb-06-2020.pkl'
df.to_pickle(summary_file)
summary_file ='train_test_val_Feb-06-2020.json'
df.to_json(summary_file)
# save the simple file
summary_file ='train_test_val_Feb-06-2020.pkl'
df.to_pickle(summary_file)
summary_file ='train_test_val_Feb-06-2020.json'
df.to_json(summary_file)
sum(df.train)
chg.unique()
sum(df['Gender']=='Men;Boys;Women;Girls') # ignore
df.loc[:,'Sneakers'] = df['SubCategory.Sneakers.and.Athletic.Shoes'] == 1
df.loc[:,'Boots'] = (df.Category1 == 'Boots')
df.loc[:,'Shoes'] = (df.Category1 == 'Shoes') & (~df.Sneakers)#remove
############
#remove ([ 'Boys', 'Boys;Girls', 'Girls','Women;Girls', 'Men;Boys;Women;Girls', nan
mens = df['Gender'] == 'Men'
womens = df['Gender'] == 'Women'
etc = df['Gender'] =='Men;Women'
df.loc[:, 'Adult'] = mens | womens | etc
#keep Adult, Sneakers, Boots, Shoes
keep_rows = (df.Sneakers | df.Boots | df.Shoes) & (df.Adult)
keep_columns = ['CID',
'Category',
'SubCategory',
'Gender',
'path',
'Sneakers',
'Boots',
'Shoes',
'path_and_file',
'Brand',
'Filename',
'image_features']
df = df.loc[df.index.intersection(keep_rows),:]
df = df.filter(items=keep_columns)
df.tail()
df
# save the simple file
summary_file = f'./categories_with_features_{today:%b-%d-%Y}.pkl'
df.to_pickle(summary_file)
summary_file = f'./categories_with_features_{today:%b-%d-%Y}.json'
df.to_json(summary_file)
summary_file ='train_test_val_Jun-25-2020.pkl'
#df.to_pickle(summary_file)
json_summary_file ='train_test_val_Jun-25-2020.json'
#df.to_json(summary_file)
df2 = pd.read_json(json_summary_file)
df = pd.read_json('categories_with_features_Jun-25-2020.json')
df.path_and_file
df.loc[:,'CategoryDir'] = 'Shoes'
df.loc[df.Boots==1,'CategoryDir'] = 'Boots'
df.loc[df.Sneakers==1,'CategoryDir'] = 'Sneakers'
# save the simple file
summary_file ='train_test_val_Feb-06-2020.pkl'
#df.to_pickle(summary_file)
json_summary_file ='train_test_val_Feb-06-2020.json'
#df.to_json(summary_file)
df2 = pd.read_json(json_summary_file)
df = pd.read_json('categories_with_features_Feb-05-2020.json')
Transfer learning with a pretrained ConvNet
Lets try two ways to customize a pretrained model:
Feature Extraction: Use the representations learned by a previous network to extract meaningful features from new samples. Simply add a new classifier, which will be trained from scratch, on top of the pretrained model so that you can repurpose the feature maps learned previously for the dataset. We will not (re)train the entire model. The base convolutional network already contains features that are generically useful for classifying pictures. However, the final, classification part of the pretrained model is specific to the original classification task, and subsequently specific to the set of classes on which the model was trained.
Fine-Tuning: Unfreeze a few of the top layers of a frozen model base and jointly train both the newly-added classifier layers and the last layers of the base model. This allows us to "fine-tune" the higher-order feature representations in the base model in order to make them more relevant for the specific task.
General machine learning workflow:
- Examine and understand the data
- Build an input pipeline, in this case using Keras ImageDataGenerator
- Compose the model
- Load in the pretrained base model (and pretrained weights)
- Stack the classification layers on top
- Train the model
- Evaluate model
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping
from tensorflow.keras.optimizers import Adam
from matplotlib import pyplot as plt
from tensorflow.keras.models import load_model
from os.path import isfile
from tensorflow.keras.applications import VGG16,VGG19
from tensorflow.keras.models import Model
#from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
import os
def no_transfer_model(num_classes,input_shape):
'''Implementation of vgg11 architecture '''
X_in = Input(input_shape)
X = Conv2D(64,3,activation='relu',padding='same')(X_in)
X = MaxPool2D(pool_size=(2,2))(X)
X = Conv2D(128,3,activation='relu',padding='same')(X)
X = MaxPool2D(pool_size=(2,2))(X)
X = Conv2D(256,3,activation='relu',padding='same')(X)
X = Conv2D(256,3,activation='relu',padding='same')(X)
X = MaxPool2D(pool_size=(2,2))(X)
X = Conv2D(512,3,activation='relu',padding='same')(X)
X = Conv2D(512,3,activation='relu',padding='same')(X)
X = MaxPool2D(pool_size=(2,2))(X)
X = Conv2D(256,3,activation='relu',padding='same')(X)
X = Conv2D(512,3,activation='relu',padding='same')(X)
X = MaxPool2D(pool_size=(2,2))(X)
X = Flatten()(X)
X = Dense(4096,activation='relu')(X)
X = Dropout(0.5)(X)
X = Dense(4096,activation='relu')(X)
X_out = Dense(num_classes,activation='softmax')(X)
model = Model(input = X_in, output = X_out)
return model
def model(model_name, num_classes, is_transfer, num_freeze_layer, weights_path,input_shape):
# # vgg16_model
# conv_base_16 = VGG16(weights='imagenet',include_top=False,input_shape=input_shape)
# #conv_base_16.summary()
#graph = tf.compat.v1.get_default_graph()
if not is_transfer or model_name == 'simple':
model = no_transfer_model(num_classes,input_shape)
print('how did i get here')
return model
# if model_name == 'vgg_16':
# conv_base_16 = VGG16(weights='imagenet',include_top=False,input_shape=input_shape)
# model = conv_base_16
# elif model_name == 'vgg_19':
# conv_base_19 = VGG19(weights='imagenet',include_top=False,input_shape=input_shape)
# model = conv_base_19
# elif model_name == 'mnNet_v2':
# mobileNet_v2 = MobileNetV2(weights='imagenet',include_top=False,input_shape=input_shape,pooling = 'avg')
# model = mobileNet_v2
if model_name == 'mnNet_v2':
mobileNet_v2 = MobileNetV2(weights='imagenet',include_top=False,input_shape=input_shape,pooling = 'avg')
model = mobileNet_v2
# final_model
#global graph
#graph = tf.get_default_graph()
print(model_name)
print(weights_path)
print('------x-x-x-x-x-x---x-xx-x-x-x-xx------')
print(input_shape)
# freeze the given number of layers
for layer in model.layers[:num_freeze_layer]:
layer.trainable = False
# Adding custom layers # maybe have a problem with the mobileNet_v2 output?
m_out = model.output
m_flatten = Flatten()(m_out)
m_dense = Dense(1024,activation='relu')(m_flatten)
m_drop = Dropout(0.5)(m_dense)
m_dense = Dense(1024,activation='relu')(m_drop)
pred_out = Dense(num_classes,activation='softmax')(m_dense)
# final_model
#with graph.as_default():
# final_model = Model(input=model.input,output=pred_out)
final_model = Model(input=model.input,output=pred_out)
print(final_model.summary())
return final_model
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import load_img,img_to_array
from os.path import isdir,abspath,join
from os import listdir
data_path = './data/' # path of the data
train_path = './train'
test_path = './test'
def load_img_arr(p):
return img_to_array(load_img(p))
def train_val_generator(
batch_size, train_path,test_path,target_size=(136,136)):
gen_tr = _train_generator()
gen_val = _val_generator()
sample = apply_to_images_in_subdirs(train_path,load_img_arr,num_per_cls = batch_size)
sample = np.array(sample)
gen_tr.fit(sample)
gen_val.fit(sample)
gen_tr = init_directory_generator(
gen_tr,train_path,batch_size,target_size=target_size)
gen_val = init_directory_generator(
gen_val,test_path,batch_size,target_size=target_size)
return gen_tr,gen_val
def _train_generator():
return ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=180,
zoom_range=0.2,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.3,
horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect'
)
def _val_generator():
return ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True
)
def apply_to_images_in_subdirs(parent_dir, func, num_per_cls=None, **kwargs):
results = []
for cls_dir_name in listdir(parent_dir):
cls_dir = abspath(join(parent_dir, cls_dir_name))
r = _apply_to_first_n_in_dir(func, cls_dir, num_per_cls, **kwargs)
results += r
return results
def _apply_to_first_n_in_dir(func, dir_, num_per_cls, **kwargs):
if not isdir(dir_):
return []
results = []
for path in listdir(dir_)[:num_per_cls]:
abspath_ = abspath(join(dir_, path))
result = func(abspath_, **kwargs)
results.append(result)
return results
def init_directory_generator(
gen, dir_,batch_size, target_size=(136,136),
class_mode='categorical',shuffle=True):
return gen.flow_from_directory(
dir_,
class_mode = class_mode,
batch_size = batch_size,
target_size = target_size,
shuffle = shuffle
)
model_name ='mnNet_v2' #'vgg_16','vgg_19','mnNet_v2','resnet_152','simple'])
is_transfer=True
num_freeze_layer=5
num_classes=4
weights_path='mnNet_v2_weights_tf.h5'
input_shape=(224,224,3)#(img_width,img_height,img_channel)')
batch_size=32
train_path='./train'
test_path='./test'
# model for traning
tr_model = model(model_name,num_classes,is_transfer, num_freeze_layer,weights_path,input_shape)
# train and test generator
train_gen, val_gen = train_val_generator(batch_size,train_path,test_path)